import pysda
import os
import pandas as pd
import geopandas as gpd
folder = r"D:\pySDA\test_data"
filename = "DengueKS2014.csv"
path = os.path.join(folder, filename)
crs="+init=epsg:4326"
pysda_data = pysda.data.readCSV(path, xtitle="X", ytitle="Y", ttitle="OnsetDay", crs=crs, tunit="day")
filename = "DengueKS2014.csv"
path = os.path.join(folder, filename)
crs="+init=epsg:4326"
df = pd.read_csv(path, encoding="utf-8")
pysda_data = pysda.data.readDF(df, xtitle="X", ytitle="Y", ttitle="OnsetDay", crs=crs, tunit="day")
filename = "DengueKS2014.shp"
path = os.path.join(folder, filename)
pysda_data = pysda.data.readSHP(path, ttitle="OnsetDay", tunit="day")
filename = "DengueKS2014.shp"
path = os.path.join(folder, filename)
gdf = gpd.read_file(path, encoding="utf-8")
pysda_data = pysda.data.readGDF(gdf, ttitle="OnsetDay", tunit="day")
crs is the Coordinate Reference System of the x and y.
ttitle is the name of the column which records the time of each point either in integer format or in date format. If its values are in integer format, pysda will directly use them as the time stamps; otherwise, pysda will firstly transform them into integer format (through tunit argument).
tunit is the temporal resolution for analysis, and the first time stamp is the first date in the input data. There are several choices:
mst = pysda.MSTDBSCAN(pysda_data)
movingRatio (default is 0.1): This is the threshold value to check whether a cluster's center moves. When the distance between centers in two sequential times over the eps_spatial is greater than the value, a cluster is considered as moving its center; otherwise, it stays.
areaRatio (default is 0.1): This is the threshold value to check whether a cluster's area changes. When the absolute difference of area in two sequential times is greater than the areaRatio, a cluster is considered as changing its area; otherwise, it keeps a similar area.
eps_spatial = 300
eps_temporalLow = 1
eps_temporalHigh = 2
min_pts = 3
movingRatio = 0.1
areaRatio = 0.1
mst.setParams(eps_spatial, eps_temporalLow, eps_temporalHigh, min_pts, movingRatio, areaRatio)
mst.run()
result = mst.result
Explaination about the columns:
clusterID: the ID of a cluster.
(Note: the value starts from 0, so 0 is the first cluster.)
mstTime: a specific time (integer format) when a point is still alive.
mstDate: a specific time (date format) when a point is still alive.
type: the evolution type of a cluster at a specific time.
(Note: There are 10 possible types. Please refer to the paper for a detailed description.)
centerX, centerY: the coordinate of the center of a cluster at a specific time.
shape: the polygon of a cluster at a specific time.
clusterGDF = result.clusters
clusterGDF.head(10)
Explaination about the columns:
pointID: the original ID of a point.
mstTime: a specific time (integer format) when a point is still alive.
mstDate: a specific time (date format) when a point is still alive.
clusterID: the cluster to which a point belongs at a specific time.
(Note: -1 means the point does not belong to any cluster, so its role must be noise.)
role: the role (core, border or noise) of a point at a specific time.
pointGDF = result.points
pointGDF.head(10)
filename = "KSVillage.shp"
path = os.path.join(folder, filename)
gdf = gpd.read_file(path, encoding="utf-8")
result.setPolygons(gdf)
Explaination about the columns:
DZ: diffusion zones. The polygons that are assigned to the same zone undergo a similar diffusion procedure.
The dates: each date is a mstDate. Each column records the situations (increas, decrease, keep, or no cluster) that every polygons undergo at that time.
polygonResultGDF = result.polygons
polygonResultGDF.head(10)
allResults = result.getAll()
clusters = allResults["clusters"]
clusters.head(10)
points = allResults["points"]
points.head(10)
polygons = allResults["polygons"]
polygons.head(10)
result.saveAll(folder, prefix="mst_")
result.saveAnimation(figsize=(8,16), dirpath=folder, prefix="mst_")